library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(lfe)
library(stargazer)
cohort <- fread("H:/Zheng_10223/Joint/cohort_2025.csv")

# gender
length(which(cohort$ImmigrationCategory=="Refugee" & cohort$Gender_Child==1 & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))
length(which(cohort$ImmigrationCategory!="Refugee" & cohort$Gender_Child==1 & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)) )

length(which(cohort$ImmigrationCategory=="Refugee" & cohort$Gender_Child==2 & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)) )
length(which(cohort$ImmigrationCategory!="Refugee" & cohort$Gender_Child==2 & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))

cohort$FemaleChild <- as.numeric(cohort$FemaleChild)
c(mean(cohort$FemaleChild[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))], na.rm=T), mean(cohort$FemaleChild[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))], na.rm=T))
c(sqrt(var(cohort$FemaleChild[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))], na.rm=T)), sqrt(var(cohort$FemaleChild[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))], na.rm=T)))
length(which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))
length(which(cohort$ImmigrationCategory!="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))


# EAST aSIAN stats

eastasiantab=cohort %>% filter(WORLD_AREA_BIRTH=="Eastern Asia") %>% group_by(IntendedOccupation_Main) %>% summarize(avgincomepct=mean(MainParent_Income_HH_MainParentAge45_49_pctparent,na.rm=T),
                                                                                                                     count=n())


noneastasiantab=cohort %>% filter(WORLD_AREA_BIRTH!="Eastern Asia") %>% group_by(IntendedOccupation_Main) %>% summarize(avgincomepct=mean(MainParent_Income_HH_MainParentAge45_49_pctparent,na.rm=T),
                                                                                                                     count=n())



write.csv(eastasiantab,"H:/Zheng_10223/ToVet/Output/eastasiantab.csv")
write.csv(noneastasiantab,"H:/Zheng_10223/ToVet/Output/noneastasiantab.csv")

# fraction landing before age 5

length(which(cohort$LANDING_AGE<=5 & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))
length(!is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))

model_all=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent,data=cohort)
summary(model_all)

# 



#### TABLE 1: SUMMARY STATS ####


##Individual rank family 45-49 rank (using income pct of parents)
refugeesind <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[cohort$ImmigrationCategory=="Refugee",])
othersind <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[cohort$ImmigrationCategory!="Refugee",])

stargazer(refugeesind, othersind)

length(which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))

length(which(cohort$ImmigrationCategory!="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)))
#CHILD#
# Landing Age : drop those with missing parent information because not included in the regressions
c(mean(cohort$LANDING_AGE[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$LANDING_AGE[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$LANDING_AGE[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$LANDING_AGE[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))




c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Africa and Middle East"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Africa and Middle East"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Eastern Asia"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Eastern Asia"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent) )]=="Europe"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Europe"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Oceania and other Asia"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Oceania and other Asia"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="South and Central America"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="South and Central America"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Southern Asia"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Southern Asia"))
c(mean(cohort$WORLD_AREA_BIRTH[which(cohort$ImmigrationCategory=="Refugee"& !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="US or other"), mean(cohort$WORLD_AREA_BIRTH[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="US or other"))



world_area_counts=cohort %>% filter(!is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent)) %>% group_by(refugeein, WORLD_AREA_BIRTH) %>% summarize(count=n())
write.csv(world_area_counts,"H:/Zheng_10223/ToVet/Table1_supporting_birth.csv")

# LANDING METRO AREA
c(mean(cohort$DESTINATION_CMA[which(cohort$ImmigrationCategory=="Refugee")]=="Toronto"), mean(cohort$DESTINATION_CMA[which(!cohort$ImmigrationCategory=="Refugee")]=="Toronto"))
c(mean(cohort$DESTINATION_CMA[which(cohort$ImmigrationCategory=="Refugee")]=="Montreal"), mean(cohort$DESTINATION_CMA[which(!cohort$ImmigrationCategory=="Refugee")]=="Montreal"))
c(mean(cohort$DESTINATION_CMA[which(cohort$ImmigrationCategory=="Refugee")]=="Vancouver"), mean(cohort$DESTINATION_CMA[which(!cohort$ImmigrationCategory=="Refugee")]=="Vancouver"))
c(mean(cohort$DESTINATION_CMA[which(cohort$ImmigrationCategory=="Refugee")]=="Calgary"), mean(cohort$DESTINATION_CMA[which(!cohort$ImmigrationCategory=="Refugee")]=="Calgary"))
c(mean(cohort$DESTINATION_CMA[which(cohort$ImmigrationCategory=="Refugee")]=="Edmonton"), mean(cohort$DESTINATION_CMA[which(!cohort$ImmigrationCategory=="Refugee")]=="Edmonton"))


landing_metro_counts=cohort %>% filter(!is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent) & DESTINATION_CMA %in% c("Toronto","Montreal","Edmonton","Calgary","Vancouver")) %>% group_by(refugeein, DESTINATION_CMA) %>% summarize(count=n())
write.csv(landing_metro_counts,"H:/Zheng_10223/ToVet/Table1_supporting_metro.csv")

# cHILD OUTOCMES
# INDIVIDUAL iNCOME
c(mean(cohort$Child_Income_IND_30_34[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$Child_Income_IND_30_34[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$Child_Income_IND_30_34[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$Child_Income_IND_30_34[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))

#Individual income pct
c(mean(cohort$Child_Income_IND_30_34_pct[which(cohort$ImmigrationCategory=="Refugee"  & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$Child_Income_IND_30_34_pct[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$Child_Income_IND_30_34_pct[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$Child_Income_IND_30_34_pct[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))
#household?
c(mean(cohort$Child_Income_HH_30_34[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$Child_Income_HH_30_34[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$Child_Income_HH_30_34[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$Child_Income_HH_30_34[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))
c(mean(cohort$Child_Income_HH_30_34_pct[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$Child_Income_HH_30_34_pct[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$Child_Income_HH_30_34_pct[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$Child_Income_HH_30_34_pct[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))



# PARENTAL VARIABLES # 
cohort$HasMother[which(is.na(cohort$HasMother))] <- 0; cohort$HasFather[which(is.na(cohort$HasFather))] <- 0

# HAS MOTHER/Father
c(mean(cohort$HasFather[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1), mean(cohort$HasFather[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1))
c(mean(cohort$HasMother[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1), mean(cohort$HasMother[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1))

#HAS FATHER AND MOTHER 
c(mean(cohort$HasFather[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1 & cohort$HasMother[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1), mean(cohort$HasFather[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1 & cohort$HasMother[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1))

# GENDER MAIN PARENT IS FATHER
c(mean(cohort$gender_MainParent[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1), mean(cohort$gender_MainParent[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]==1))

# LANGUAGE 
c(mean(cohort$AnyEnglish_Main[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$AnyEnglish_Main[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(mean(cohort$AnyFrench_Main[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]), mean(cohort$AnyFrench_Main[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))

# HH income main parent 45-49
c(mean(cohort$MainParent_Income_HH_MainParentAge45_49[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent) )]), mean(cohort$MainParent_Income_HH_MainParentAge45_49[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))]))
c(sqrt(var(cohort$MainParent_Income_HH_MainParentAge45_49[which(cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])), sqrt(var(cohort$MainParent_Income_HH_MainParentAge45_49[which(!cohort$ImmigrationCategory=="Refugee" & !is.na(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent))])))

# HH income main parent 45-49 pct parents
c(mean(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))

# HH income when child is 15-19 
c(mean(cohort$MainParent_Income_HH_15_19[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_15_19[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_15_19[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_15_19[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))
c(mean(cohort$MainParent_Income_HH_15_19_pct[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_15_19_pct[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_15_19_pct[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_15_19_pct[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))

# HH income post 10 years landing 
c(mean(cohort$MainParent_Income_HH_PostLanding10[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_PostLanding10[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_PostLanding10[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_PostLanding10[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))

c(mean(cohort$MainParent_Income_HH_PostLanding10_pctparent[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_PostLanding10_pctparent[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_PostLanding10_pctparent[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_PostLanding10_pctparent[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))

# HH max schooling 
c(mean(cohort$YEARS_OF_SCHOOLING_PARENT_MAX[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$YEARS_OF_SCHOOLING_PARENT_MAX[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$YEARS_OF_SCHOOLING_PARENT_MAX[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$YEARS_OF_SCHOOLING_PARENT_MAX[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))

# HH at least 16 years education 
cohort$Schooling16 <- (cohort$YEARS_OF_SCHOOLING_PARENT_MAX>=16)
c(mean(cohort$Schooling16[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$Schooling16[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))

# Intended Occupation
cohort$IntendedOccupation <- NA
cohort$IntendedOccupation[which(cohort$SKILL_LEVEL_CD11_Main %in% c("0", "A"))] <- "Managerial/Professional"
cohort$IntendedOccupation[which(cohort$SKILL_LEVEL_CD11_Main %in% c("B"))] <- "Skilled and Technical"
cohort$IntendedOccupation[which(cohort$SKILL_LEVEL_CD11_Main %in% c("C", "D"))] <- "Clerical and Laborers"
cohort$IntendedOccupation[which(cohort$SKILL_LEVEL_CD11_Main %in% c("N"))] <- "New Workers"
cohort$IntendedOccupation[which(cohort$SKILL_LEVEL_CD11_Main %in% c("O", "R", "S"))] <- "Non-Workers"
c(mean(cohort$IntendedOccupation[which(cohort$ImmigrationCategory=="Refugee")]=="Managerial/Professional", na.rm=T), mean(cohort$IntendedOccupation[which(!cohort$ImmigrationCategory=="Refugee")]=="Managerial/Professional", na.rm=T))
c(mean(cohort$IntendedOccupation[which(cohort$ImmigrationCategory=="Refugee")]=="Skilled and Technical", na.rm=T), mean(cohort$IntendedOccupation[which(!cohort$ImmigrationCategory=="Refugee")]=="Skilled and Technical", na.rm=T))
c(mean(cohort$IntendedOccupation[which(cohort$ImmigrationCategory=="Refugee")]=="Clerical and Laborers", na.rm=T), mean(cohort$IntendedOccupation[which(!cohort$ImmigrationCategory=="Refugee")]=="Clerical and Laborers", na.rm=T))
c(mean(cohort$IntendedOccupation[which(cohort$ImmigrationCategory=="Refugee")]=="New Workers", na.rm=T), mean(cohort$IntendedOccupation[which(!cohort$ImmigrationCategory=="Refugee")]=="New Workers", na.rm=T))
c(mean(cohort$IntendedOccupation[which(cohort$ImmigrationCategory=="Refugee")]=="Non-Workers", na.rm=T), mean(cohort$IntendedOccupation[which(!cohort$ImmigrationCategory=="Refugee")]=="Non-Workers", na.rm=T))

# supporting counts
occupation_counts=cohort %>% group_by(refugeein, IntendedOccupation) %>% summarize(counts=n())
write.csv(occupation_counts,"H:/Zheng_10223/ToVet/Table1_supporting_occupation.csv")


panel <- fread("H:/Zheng_10223/Joint/child_year_panel.csv")
panel <- merge(x=panel, y=cohort[,c("MAIN_PARENT", "LandingYear_MainParent")], by.x="MAIN_PARENT", by.y="MAIN_PARENT")
panel <- data.table(panel)
cpi <- fread("H:/Zheng_10223/Joint/cpi/cpi.csv")
panel <- merge(x=panel, y=cpi, by.x="Year", by.y="year", all.x=T)
panel[,c("MainParent_TIRC_HH")] <- panel[,c("MainParent_TIRC_HH")]*cpi$cpi[which(cpi$year==2020)]/panel$cpi
cohort <- merge(x=cohort,  y=unique(panel[Year==LandingYear_MainParent+20, c("MAIN_PARENT", "MainParent_TIRC_HH")]), by.x="MAIN_PARENT", by.y="MAIN_PARENT", all.x=T)
cohort <- merge(x=cohort,  y=unique(panel[Year==LandingYear_MainParent, c("MAIN_PARENT", "MainParent_TIRC_HH")]), by.x="MAIN_PARENT", by.y="MAIN_PARENT", all.x=T)
setnames(cohort, old=c("MainParent_TIRC_HH.x", "MainParent_TIRC_HH.y"), new=c("MainParent_Income_HH_PostLanding20", "MainParent_Income_HH_AtLanding"))
cohort$MainParent_Income_HH_PostLanding20[which(cohort$MainParent_Income_HH_PostLanding20<0)] <- NA
cohort$MainParent_Income_HH_PostLanding20[which(cohort$MainParent_Income_HH_PostLanding20>=quantile(cohort$MainParent_Income_HH_PostLanding20, 0.999, na.rm=T))] <- quantile(cohort$MainParent_Income_HH_PostLanding20, 0.999, na.rm=T)
cohort$MainParent_Income_HH_AtLanding[which(cohort$MainParent_Income_HH_AtLanding<0)] <- NA
cohort$MainParent_Income_HH_AtLanding[which(cohort$MainParent_Income_HH_AtLanding>=quantile(cohort$MainParent_Income_HH_AtLanding, 0.999, na.rm=T))] <- quantile(cohort$MainParent_Income_HH_AtLanding, 0.999, na.rm=T)



c(mean(cohort$MainParent_Income_HH_AtLanding[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T), mean(cohort$MainParent_Income_HH_AtLanding[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T))
c(sqrt(var(cohort$MainParent_Income_HH_AtLanding[which(cohort$ImmigrationCategory=="Refugee")], na.rm=T)), sqrt(var(cohort$MainParent_Income_HH_AtLanding[which(!cohort$ImmigrationCategory=="Refugee")], na.rm=T)))



################## Private/ Public : 
refugees=cohort[cohort$ImmigrationCategory=="Refugee",]
mean(refugees$YEARS_OF_SCHOOLING_MainParent[refugees$refugeetype=="Public"],na.rm=TRUE)
mean(refugees$YEARS_OF_SCHOOLING_MainParent[refugees$refugeetype=="Private"],na.rm=TRUE)

length(which(refugees$refugeetype=="Public"))
length(which(refugees$refugeetype=="Private"))

c(mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Public")]=="Managerial/Professional", na.rm=T), mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Private")]=="Managerial/Professional", na.rm=T))
c(mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Public")]=="Skilled and Technical", na.rm=T), mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Private")]=="Skilled and Technical", na.rm=T))
c(mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Public")]=="Clerical and Laborers", na.rm=T), mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Private")]=="Clerical and Laborers", na.rm=T))
c(mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Public")]=="New Workers", na.rm=T), mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Private")]=="New Workers", na.rm=T))
c(mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Public")]=="Non-Workers", na.rm=T), mean(refugees$IntendedOccupation[which(refugees$refugeetype=="Private")]=="Non-Workers", na.rm=T))



length(which(refugees$refugeetype=="Public" & refugees$IntendedOccupation=="Managerial/Professional"))
length(which(refugees$refugeetype=="Private" & refugees$IntendedOccupation=="Managerial/Professional"))
length(which(refugees$refugeetype=="Public" & refugees$IntendedOccupation=="Skilled and Technical"))
length(which(refugees$refugeetype=="Private" & refugees$IntendedOccupation=="Skilled and Technical"))
length(which(refugees$refugeetype=="Public" & refugees$IntendedOccupation=="New Workers"))
length(which(refugees$refugeetype=="Private" & refugees$IntendedOccupation=="New Workers"))


# World area birth

c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Africa and Middle East"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Africa and Middle East"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Eastern Asia"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Eastern Asia"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent) )]=="Europe"), mean(refugees$WORLD_AREA_BIRTH[which(refugees$refugeetype=="Private"   & refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Europe"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Oceania and other Asia"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Oceania and other Asia"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="South and Central America"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="South and Central America"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Southern Asia"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private"  & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="Southern Asia"))
c(mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Public" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="US or other"), mean(refugees$WORLD_AREA_BIRTH[which( refugees$refugeetype=="Private" & !is.na(refugees$MainParent_Income_HH_MainParentAge45_49_pctparent))]=="US or other"))



